package it.uniroma2.svd.writer;


import it.uniroma2.svd.DataInterface;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Hashtable;

import it.uniroma2.svd.writer.DataPlusInputStream;
import it.uniroma2.svd.writer.DataPlusOutputStream;
import it.uniroma2.svd.writer.FileTools;


public class FileToolsBin implements FileTools {

	
	DataPlusOutputStream out = null;
	
	/**
	 * apre il canale di Output proprio dei metodi di scrittura binaria
	 * @param PathOutput
	 * @throws Exception
	 */
	
	public void initHandle (String PathOutput) throws  Exception {
		
		out = new DataPlusOutputStream(new FileOutputStream(PathOutput) );				
		
	}
	
	public void closeHandle () throws  Exception {
		
		out.flush();
		out.close();
		
	}

	
	
	/**
	 * 
	 * carica i dati nelle matrici LSA strutturandoli nei diversi campi leggendoli da file (formato Binary)
	 *
	 * @throws Exception
	 */
	
	public Object[] loadToken (String PathInput)  throws  Exception {
		
			 System.out.println("Loading "+PathInput+" (every '.' correspond to 1000 lines loaded ");
			 Hashtable<String, Integer>dataHash = new Hashtable<String, Integer>();		
			 DataPlusInputStream in = new DataPlusInputStream(new File(PathInput) );

	 		int row = in.readInt();
	 		int column = in.readInt();

			Object[][] data = new Object[row][4];

			byte[] buffer = new byte[column*4 + 8];		
			
			for (int i = 0; i < row; i++) {
				
				short lungStr = in.readShort();
				String feature = in.readString(lungStr);			
				data[i][0] = feature;
				
				in.read(buffer);		
				ByteArrayInputStream b = new ByteArrayInputStream(buffer);	
				DataInputStream inn = new DataInputStream(b);				
				
				
				data[i][1] = new Float(inn.readFloat());		
				data[i][2] = new Float(inn.readFloat());								
				
				float[] matrixData = new float[column];
				
				
				for (int j = 0; j < column; j++)  
					matrixData[j] = inn.readFloat();			
				
				data[i][3] = matrixData;
				
				inn.close();	
				b.close();		

			    feature = feature.replaceAll("[(),.;!?\"%'\\/{}~^]", "");	//pulitore: da togliere a regime
				
				dataHash.put(feature, new Integer (i));	// !!!  questa e' la Hashtable con cui cercare le parole nelle matrici
				
				if (i % 1000 == 0)                                  
					System.out.print(".");                 
			}
			in.close();
		Object[] ritorni ={data, dataHash};	// array dei dati, hashtable chiavi
		System.out.println(" ... Loaded");

		return ritorni;
		
	}
	
	


	/**
	 * Scrive su File i vettori  LSA (formato Bin)
	 * 
	 * @param matrixData  la matrice dei dati ristretta al vettore corrisp a una linea (index(doc o word), LSA vector) 
	 * @param index l'indice riga
	 * @param data	HashMap<Integer, String>  (indexDoc, NameDoc) opp (indexWord, nameWord)
	 * @param output_ il printerWriter di scrittura 
	 * @param type DOC o WORD
	 * @throws Exception
	 */	


public void writeTokenLine(float[] matrixData, 
		int index,
		DataInterface data,
		FileTools.type type) throws Exception {
	
		int column = matrixData.length;
		int row = 0;
		String feature = null;
 		Float weight;				

		try{
	
			
			switch (type) {
			case DOC:
				row = data.documentSize();
				try{
					feature = data.getDoc(index);
					if(feature == null){
						System.err.println("Error accessing DATA: Search doc by id "+index);
						feature = "ERROR";
					}
				}catch(Exception e){
					e.printStackTrace();
					System.err.println("Error accessing DATA: Search doc by id "+index);
					feature = "ERROR";
				}
				//feature = data.getDoc(index);
				break;
			case WORD:
				row = data.termSize();
				try{
					feature = data.getWord(index);
					if(feature == null){
						System.err.println("Error accessing DATA: Search word by id "+index);
						feature = "ERROR";
					}
				}catch(Exception e){
					e.printStackTrace();
					System.err.println("Error accessing DATA: Search word by id "+index);
					feature = "ERROR";
				}
				break;
			default:
				break;
			}
			
				// scrive in testa alla matrice il num_righe num_colonne
				if (index == 0) {
			 		out.writeInt(row);
			 		out.writeInt(column);
				}
			
			 	
			 	if(type == FileTools.type.DOC)
			 		weight = 0.0F;
			 	else
			 		weight =((data.getWordWeight(index)!=null)?(new Float(data.getWordWeight(index))):0.0F);

			 	
			 	try{
			 		out.writeShort((short)feature.length());
			 		out.writeString(feature, feature.length());  
			 	}catch(Exception e){
			 		e.printStackTrace();
					System.err.println("Error writing DATA: feature "+feature + " -- out "+(out==null?"null":"not null"));
					return;
			 	}
			 	
		 		ByteArrayOutputStream b = new ByteArrayOutputStream(column*4 + 8);	// new
				DataOutputStream outt = new DataOutputStream(b);				    // new

		 		outt.writeFloat(weight);  
		 		outt.writeFloat(0.0F);	  
		 		
		 		for (int n = 0; n < column; n++) 	
					outt.writeFloat( matrixData[n] );	
		 		
				
		 		out.write(b.toByteArray());		
		 		
		 		outt.close();	
		 		b.close();		

			 	if (index % 1000 == 0)
			 		System.out.print(".");

			
			
		} catch ( Exception e ) {  
			throw e;
		} 	
	}
	
	public void writeToken(float[][] matrixData, String pathOut,  DataInterface data,  FileTools.type type) throws Exception {  
		
		try {
			
			out = new DataPlusOutputStream(new FileOutputStream(pathOut) );				
	
			int row = matrixData.length;                 
			int column = matrixData[0].length;
			
			System.out.println("row: "+row+" column: "+column); 
			System.out.println("path Out writing in progress (One dot every 1000 lines): "+pathOut); 
			Float weight;
			
			for (int index = 0; index < row; index++) {
	
				String feature = null;
								
				switch (type) {
				case DOC:
					try{
						feature = data.getDoc(index);

						if(feature == null){
							System.err.println("Error accessing DATA: Search doc by id "+index);
							feature = "ERROR";
						}
					}catch(Exception e){
						e.printStackTrace();
						System.err.println("Error accessing DATA: Search doc by id "+index);
						feature = "ERROR";
					}
					//feature = data.getDoc(index);
					break;
				case WORD:
					try{
						feature = data.getWord(index);

						if(feature == null){
							System.err.println("Error accessing DATA: Search word by id "+index);
							feature = "ERROR";
						}
					}catch(Exception e){
						e.printStackTrace();
						System.err.println("Error accessing DATA: Search word by id "+index);
						feature = "ERROR";
					}
					break;
				default:
					break;
				}
				
				
				
				// scrive in testa alla matrice il num_righe num_colonne
				if (index == 0) {
			 		out.writeInt(row);
			 		out.writeInt(column);
				}
			
				
				
			 	if(type == FileTools.type.DOC)
			 		weight = 0.0F;
			 	else
			 		weight =((data.getWordWeight(index)!=null)?(new Float(data.getWordWeight(index))):0.0F);

			 	
			 	
			 	try{
			 		out.writeShort((short)feature.length());
			 		out.writeString(feature, feature.length());  
			 	}catch(Exception e){
			 		e.printStackTrace();
					System.err.println("Error writing DATA: feature "+feature + " -- out "+(out==null?"null":"not null"));
					return;
			 	}

			 	
		 		out.writeFloat(weight);
		 		out.writeFloat(0.0F);
		 		
		 		for (int n = 0; n < column; n++) 	
						out.writeFloat( matrixData[index][n] );
		 		
				if (index % 1000 == 0)                                  
					System.out.print(".");                 
			
			}                                 
			
			out.close();
			
		} catch ( Exception e ) {  
			throw e;
		} 	
		
	}

	

	    
}
